# Functions
# Plot color palette
plot_color_palette <- function(input_cols) {
col_data <- tibble(color = input_cols) %>%
mutate(color = fct_inorder(color))
res <- col_data %>%
ggplot(aes(x = "color", fill = color)) +
geom_bar() +
scale_fill_manual(values = input_cols) +
theme_void()
res
}
# Pull items from list of vectors
pull_nest_vec <- function(list_in, idx) {
res <- map_chr(list_in, ~ .x[[idx]])
res
}
# Capitalize first character without modifying other characters
str_to_title_v2 <- function(string) {
cap_first_chr <- function(string) {
chrs <- string %>%
str_split(pattern = "") %>%
unlist()
if (any(chrs %in% LETTERS)) {
return(string)
}
chrs[1] <- str_to_upper(chrs[1])
res <- chrs %>%
reduce(str_c)
res
}
res <- string %>%
map_chr(~ {
.x %>%
str_split(pattern = " ") %>%
unlist() %>%
map_chr(cap_first_chr) %>%
reduce(str_c, sep = " ")
})
res
}
# Set colors
set_cols <- function(types_in, cols_in, other_cols) {
types_in <- types_in[!types_in %in% names(other_cols)]
cols_in <- cols_in[!cols_in %in% other_cols]
names(cols_in) <- types_in
cols_in <- cols_in[!is.na(names(cols_in))]
res <- c(cols_in, other_cols)
res
}
# Set subtype colors
set_type_cols <- function(type_in, sobjs_in, type_key, type_column = "subtype",
cols_in, other_cols) {
sobjs_in <- sobjs_in[type_key[names(sobjs_in)] == type_in]
res <- sobjs_in %>%
map(~ {
.x@meta.data %>%
pull(type_column) %>%
unique()
}) %>%
reduce(c) %>%
unique() %>%
set_cols(
cols_in = cols_in,
other_cols = other_cols
)
res
}
# Import matrices and create Seurat object
create_sobj <- function(matrix_dir, proj_name, gene_min = 100, gene_max = 5000,
mito_max = 25, mito_str = "^mt-", min_cells = 0, ...) {
# Load matrices
mat_list <- Read10X(matrix_dir)
rna_mat <- mat_list
# Create Seurat object using gene expression data
if (is_list(mat_list)) {
rna_mat <- mat_list[["Gene Expression"]]
}
res <- rna_mat %>%
CreateSeuratObject(
project = proj_name,
min.cells = min_cells
)
# Add antibody capture data to Seurat object
if (is_list(mat_list)) {
adt_mat <- res %>%
colnames() %>%
mat_list[["Antibody Capture"]][, .]
res[["ADT"]] <- CreateAssayObject(adt_mat)
}
# Calculate percentage of mitochondrial reads
res <- res %>%
PercentageFeatureSet(
pattern = mito_str,
col.name = "Percent_mito"
)
# Add QC classifications to meta.data
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(qc_class = "Pass filters") %>%
column_to_rownames("cell_ids")
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
qc_class = str_replace(qc_class, "Singlet", "Pass filters"),
qc_class = ifelse(nFeature_RNA <= gene_min, "Low gene count", qc_class),
qc_class = ifelse(nFeature_RNA >= gene_max, "High gene count", qc_class),
qc_class = ifelse(Percent_mito >= mito_max, "High mito reads", qc_class)
) %>%
column_to_rownames("cell_id")
res
}
# Filter and normalize matrices
norm_sobj <- function(sobj_in, sample_order = NULL) {
res <- sobj_in %>%
subset(qc_class == "Pass filters") %>%
NormalizeData(normalization.method = "LogNormalize")
if ("ADT" %in% names(res)) {
res <- res %>%
NormalizeData(
assay = "ADT",
normalization.method = "CLR"
)
}
# Add meta.data columns
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(orig.ident = fct_relevel(orig.ident, sample_order)) %>%
column_to_rownames("cell_ids")
res
}
# Cluster gene expression data
cluster_RNA <- function(sobj_in, assay = "RNA", resolution = 1,
dims = 1:30, prefix = "", ...) {
# Use FindNeighbors to construct a K-nearest neighbors graph based on the euclidean distance in
# PCA space, and refine the edge weights between any two cells based on the
# shared overlap in their local neighborhoods (Jaccard similarity).
# Use FindClusters to apply modularity optimization techniques such as the Louvain algorithm
# (default) or SLM, to iteratively group cells together
# Perform PCA
# By default only variable features are used for PCA
res <- sobj_in %>%
RunPCA(assay = assay, ...) %>%
AddMetaData(
metadata = FetchData(., c("PC_1", "PC_2")),
col.name = str_c(prefix, c("PC_1", "PC_2"))
)
# Create nearest neighbors graph and find clusters
res <- res %>%
FindNeighbors(
assay = assay,
reduction = "pca",
dims = dims
) %>%
FindClusters(
resolution = resolution,
verbose = F
) %>%
AddMetaData(
metadata = Idents(.),
col.name = str_c(assay, "_clusters")
)
# Run UMAP, UMAP coordinates will get added to the meta.data by clustifyr
res <- res %>%
RunUMAP(
assay = assay,
dims = dims,
reduction.name = str_c(prefix, "umap"),
reduction.key = str_c(prefix, "UMAP_")
)
res
}
# Subset Seurat objects for plotting
subset_sobj <- function(sobj_in, name, type_vec, include_list, type_column = "cell_type1", subtype_column = "cell_type2",
min_cells = 15, orig_idents = c("_1" = "CD45_neg", "_2" = "CD45_pos"),
type_filt = c("DC" = "CD45_pos", "LEC" = "CD45_neg", "fibroblast" = "CD45_neg"),
cap_names = T, ...) {
res <- sobj_in
type_in <- type_vec[[name]]
include_types <- include_list[[name]]
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
cell_type = !!sym(type_column),
subtype = !!sym(subtype_column)
) %>%
column_to_rownames("cell_id")
# Set orig.ident based on cell_id suffix
if (!is.null(orig_idents)) {
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
orig.ident = str_extract(cell_id, "_[0-9]+$"),
orig.ident = orig_idents[orig.ident]
) %>%
column_to_rownames("cell_id")
}
# Filter cells based on orig.ident
if (!is.null(type_filt)) {
res <- res %>%
subset(subset = orig.ident == type_filt[type_in])
}
# Filter cells based on input cell type
res <- res %>%
subset(subset = cell_type %in% c(type_in, include_types))
if (!type_in %in% pull(res@meta.data, cell_type)) {
stop(str_c("ERROR: Cell type not present after filtering for ", CD45_status))
}
# Filter cells based on number of cells per subtype
if (!is.null(min_cells)) {
keep_cells <- res@meta.data %>%
rownames_to_column("cell_id") %>%
group_by(subtype) %>%
filter(n_distinct(cell_id) > min_cells) %>%
pull(cell_id)
res <- res %>%
subset(cells = keep_cells)
}
# Re-run UMAP
res <- res %>%
RunPCA() %>%
RunUMAP(dims = 1:40) %>%
AddMetaData(FetchData(., c("PC_1", "PC_2"))) %>%
AddMetaData(FetchData(., c("UMAP_1", "UMAP_2")))
# Capitalize subtypes
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(subtype = str_to_title_v2(subtype)) %>%
column_to_rownames("cell_id")
res
}
# Re-cluster and run clustify
run_clustifyr <- function(sobj_in, name, type_vec, resolution = 1.4, type_column = "cell_type",
subtype_column = "subtype", clust_column = "seurat_clusters") {
res <- sobj_in
type_in <- type_vec[[name]]
ref <- params$sobj_refs[[type_in]]
if (!is.null(ref)) {
load(ref)
ref <- ref %>%
basename() %>%
str_remove("\\.rda$")
res <- sobj_in %>%
FindNeighbors(
reduction = "pca",
dims = 1:30
) %>%
FindClusters(
resolution = resolution,
verbose = F
) %>%
clustify(
ref_mat = eval(parse(text = ref)),
cluster_col = clust_column
)
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
!!sym(subtype_column) := if_else(!!sym(type_column) == type_in, type, !!sym(type_column)),
!!sym(subtype_column) := str_to_title_v2(!!sym(subtype_column))
) %>%
rename(
UMAP_1 = UMAP_1.x,
UMAP_2 = UMAP_2.x
) %>%
column_to_rownames("cell_id")
}
res
}
# Filter list of Seurat objects for patient, normalize and merge objects
merge_sobj <- function(sobj_list, sample_order = NULL) {
res <- merge(
x = sobj_list[[1]],
y = sobj_list[2:length(sobj_list)],
add.cell.ids = names(sobj_list)
) %>%
ScaleData(assay = "RNA") %>%
ScaleData(assay = "adt") %>%
FindVariableFeatures(assay = "RNA")
# Set sample order
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(orig.ident = fct_relevel(orig.ident, sample_order)) %>%
column_to_rownames("cell_ids")
res
}
# Fit gaussian mixture model for given signal
fit_GMM <- function(sobj_in, data_column = "adt_ovalbumin") {
# Fit GMM for OVA signal
data_df <- sobj_in %>%
FetchData(data_column)
mixmdl <- data_df %>%
pull(data_column) %>%
normalmixEM()
# New column names
ova_names <- c("Low", "High")
comp_names <- c("comp.1", "comp.2")
if (mixmdl$mu[1] > mixmdl$mu[2]) {
ova_names <- rev(ova_names)
}
names(comp_names) <- ova_names
names(mixmdl$mu) <- ova_names
names(mixmdl$sigma) <- ova_names
names(mixmdl$lambda) <- ova_names
# Divide into OVA groups
res <- data.frame(
cell_id = rownames(data_df),
data = data_df[, data_column],
mixmdl$posterior
) %>%
dplyr::rename(!!sym(data_column) := data) %>%
rename(all_of(comp_names)) %>%
mutate(GMM_grp = if_else(Low > 0.5, "Low", "High")) %>%
column_to_rownames("cell_id")
res <- list(
res = res,
mu = mixmdl$mu,
sigma = mixmdl$sigma,
lambda = mixmdl$lambda
)
res
}
# Add distribution of GMM component to plot
add_stat_fun <- function(gmm_in, cols_in, key) {
# dnorm provides density for the normal distribution given the mean and
# standard deviation. Lambda is used to adjust for the mixture composition.
# mu: Mean of component
# sig: Standard deviation of component
# lam: Lambda of component (mixture weight)
plot_mix_comps <- function(x, mu, sigma, lam) {
lam * dnorm(x, mu, sigma)
}
stat_function(
geom = "line",
fun = plot_mix_comps,
args = list(gmm_in$mu[key], gmm_in$sigma[key], gmm_in$lambda[key]),
color = cols_in[key],
lwd = 1
)
}
# Overlay feature data on UMAP or tSNE
# Cannot change number of columns when using FeaturePlot with split.by
plot_features <- function(sobj_in, x = "UMAP_1", y = "UMAP_2", feature, data_slot = "data",
split_id = NULL, pt_size = 0.25, pt_outline = NULL, plot_cols = NULL,
feat_levels = NULL, split_levels = NULL, min_pct = NULL, max_pct = NULL,
calc_cor = F, lm_line = F, lab_size = 3.7, lab_pos = c(0.8, 0.9), ...) {
# Format imput data
counts <- sobj_in
if ("Seurat" %in% class(sobj_in)) {
vars <- c(x, y, feature)
if (!is.null(split_id)) {
vars <- c(vars, split_id)
}
counts <- sobj_in %>%
FetchData(vars = unique(vars), slot = data_slot) %>%
as_tibble(rownames = "cell_ids")
}
# Rename features
if (!is.null(names(feature))) {
counts <- counts %>%
rename(!!!syms(feature))
feature <- names(feature)
}
if (!is.null(names(x))) {
counts <- counts %>%
rename(!!!syms(x))
x <- names(x)
}
if (!is.null(names(y))) {
counts <- counts %>%
rename(!!!syms(y))
y <- names(y)
}
# Set min and max values for feature
if (!is.null(min_pct) || !is.null(max_pct)) {
counts <- counts %>%
mutate(
pct_rank = percent_rank(!!sym(feature)),
max_val = ifelse(pct_rank > max_pct, !!sym(feature), NA),
max_val = min(max_val, na.rm = T),
min_val = ifelse(pct_rank < min_pct, !!sym(feature), NA),
min_val = max(min_val, na.rm = T),
!!sym(feature) := if_else(!!sym(feature) > max_val, max_val, !!sym(feature)),
!!sym(feature) := if_else(!!sym(feature) < min_val, min_val, !!sym(feature))
)
}
# Set feature order
if (!is.null(feat_levels)) {
counts <- counts %>%
mutate(!!sym(feature) := fct_relevel(!!sym(feature), feat_levels))
}
# Set facet order
if (!is.null(split_id) && length(split_id) == 1) {
counts <- counts %>%
mutate(split_id = !!sym(split_id))
if (!is.null(split_levels)) {
counts <- counts %>%
mutate(split_id = fct_relevel(split_id, split_levels))
}
}
# Calculate correlation
if (calc_cor) {
if (!is.null(split_id)) {
counts <- counts %>%
group_by(split_id)
}
counts <- counts %>%
mutate(
cor_lab = cor(!!sym(x), !!sym(y)),
cor_lab = round(cor_lab, digits = 2),
cor_lab = str_c("r = ", cor_lab),
min_x = min(!!sym(x)),
max_x = max(!!sym(x)),
min_y = min(!!sym(y)),
max_y = max(!!sym(y)),
lab_x = (max_x - min_x) * lab_pos[1] + min_x,
lab_y = (max_y - min_y) * lab_pos[1] + min_y
)
}
# Create scatter plot
# To add outline for each cluster create separate layers
res <- counts %>%
arrange(!!sym(feature))
if (!is.null(pt_outline)) {
if (!is.numeric(counts[[feature]])) {
res <- res %>%
ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature), fill = !!sym(feature)))
feats <- counts[[feature]] %>%
unique()
if (!is.null(feat_levels)) {
feats <- feat_levels[feat_levels %in% feats]
}
for (feat in feats) {
f_counts <- counts %>%
filter(!!sym(feature) == feat)
res <- res +
geom_point(data = f_counts, aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
geom_point(data = f_counts, size = pt_size)
}
} else {
res <- res %>%
ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
geom_point(aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
geom_point(size = pt_size)
}
} else {
res <- res %>%
ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
geom_point(size = pt_size)
}
# Add regression line
if (lm_line) {
res <- res +
geom_smooth(method = "lm", se = F, color = "black", size = 0.5, linetype = 2)
}
# Add correlation coefficient label
if (calc_cor) {
res <- res +
geom_text(
aes(x = lab_x, lab_y, label = cor_lab),
color = "black",
size = lab_size,
check_overlap = T,
show.legend = F
)
}
# Set feature colors
if (!is.null(plot_cols)) {
if (is.numeric(counts[[feature]])) {
res <- res +
scale_color_gradient(low = plot_cols[1], high = plot_cols[2])
} else {
res <- res +
scale_color_manual(values = plot_cols) +
scale_fill_manual(values = plot_cols)
}
}
# Split plot into facets
if (!is.null(split_id)) {
if (length(split_id) == 1) {
res <- res +
facet_wrap(~ split_id, ...)
} else if (length(split_id) == 2) {
eq <- str_c(split_id[1], " ~ ", split_id[2])
res <- res +
facet_grid(as.formula(eq), ...)
}
}
res
}
# Run gprofiler
run_gprofiler <- function(gene_list, genome = NULL, gmt_id = NULL, p_max = params$p_max_GO,
order_query = params$order_query, dbases = c("GO:BP", "GO:MF", "KEGG"), ...) {
# Check for empty gene list
if (is_empty(gene_list)) {
return(as_tibble(NULL))
}
# Check arguments
if (is.null(genome) && is.null(gmt_id)) {
stop("ERROR: Must specifiy genome or gmt_id")
}
# Retrieve organism name for gProfileR
if (!is.null(genome)){
genomes <- list(
GRCm = "mmusculus",
GRCh = "hsapiens",
BDGP = "dmelanogaster"
)
org <- genome %>%
str_remove("[0-9]+$") %>%
genomes[[.]]
}
if (!is.null(gmt_id)) {
org <- gmt_id
dbases <- NULL
}
# Run gProfileR
res <- gene_list %>%
gost(
organism = org,
sources = dbases,
domain_scope = "annotated",
significant = T,
user_threshold = p_max,
ordered_query = order_query,
...
)
# Format and sort output data.frame
res <- as_tibble(res$result)
if (!is_empty(res)) {
res <- res %>%
arrange(source, p_value)
}
res
}
# Create GO bubble plot
create_bubbles <- function(GO_df, plot_colors = theme_cols[c(1:2, 4, 9)],
n_terms = 15) {
# Check for empty inputs
if (is_empty(GO_df) || nrow(GO_df) == 0) {
res <- ggplot() +
geom_blank()
return(res)
}
# Shorten GO terms and database names
GO_data <- GO_df %>%
mutate(
term_id = str_remove(term_id, "(GO|KEGG):"),
term_id = str_c(term_id, " ", term_name),
term_id = str_to_lower(term_id),
term_id = str_trunc(term_id, 40, "right"),
source = fct_recode(
source,
"Biological\nProcess" = "GO:BP",
"Cellular\nComponent" = "GO:CC",
"Molecular\nFunction" = "GO:MF",
"KEGG" = "KEGG"
)
)
# Reorder database names
plot_levels <- c(
"Biological\nProcess",
"Cellular\nComponent",
"Molecular\nFunction",
"KEGG"
)
GO_data <- GO_data %>%
mutate(source = fct_relevel(source, plot_levels))
# Extract top terms for each database
top_GO <- GO_data %>%
group_by(source) %>%
arrange(p_value) %>%
dplyr::slice(1:n_terms) %>%
ungroup()
# Create bubble plots
res <- GO_data %>%
ggplot(aes(1.25, -log10(p_value), size = intersection_size)) +
geom_point(color = plot_colors, alpha = 0.5, show.legend = T) +
geom_text_repel(
aes(2, -log10(p_value), label = term_id),
data = top_GO,
size = 2.3,
direction = "y",
hjust = 0,
segment.size = NA
) +
xlim(1, 8) +
labs(y = "-log10(p-value)") +
theme_info +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
facet_wrap(~ source, scales = "free", nrow = 1)
res
}
# Plot percentage of cells in given groups
plot_cell_count <- function(sobj_in, group_id, split_id = NULL, group_order = NULL,
fill_id, plot_colors = theme_cols,
x_lab = "Cell type", y_lab = "Fraction of cells",
bar_pos = "fill", order_count = T, bar_line = 0, ...) {
res <- sobj_in@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(
group_id := !!sym(group_id),
fill_id := !!sym(fill_id)
)
if (!is.null(group_order)) {
res <- res %>%
mutate(group_id = fct_relevel(group_id, group_order))
}
if (!is.null(split_id)) {
res <- res %>%
mutate(split_id := !!sym(split_id))
}
if (order_count) {
res <- res %>%
mutate(fill_id = fct_reorder(fill_id, cell_ids, n_distinct))
}
res <- res %>%
ggplot(aes(group_id, fill = fill_id)) +
geom_bar(position = bar_pos, size = bar_line, color = "black") +
scale_fill_manual(values = plot_colors) +
labs(x = x_lab, y = y_lab) +
theme_info
if (!is.null(split_id)) {
res <- res +
facet_wrap(~ split_id, ...)
}
res
}
# Find markers with presto
find_markers <- function(input_sobj, group_column = NULL, uniq = params$uniq_markers,
FC_min = params$FC_min, auc_min = params$auc_min, p_max = params$p_max_markers,
pct_in_min = params$pct_in_min, pct_out_max = params$pct_out_max, ...) {
res <- input_sobj %>%
wilcoxauc(group_by = group_column) %>%
as_tibble() %>%
filter(
padj < p_max,
logFC > FC_min,
auc > auc_min,
pct_in > pct_in_min,
pct_out < pct_out_max
) %>%
arrange(desc(logFC))
if (uniq) {
res <- res %>%
group_by(feature) %>%
filter(n() == 1) %>%
ungroup()
}
res
}
# Find cluster markers for each separate cell type
find_group_markers <- function(input_grp, input_sobj, grp_column, clust_column) {
res <- input_sobj %>%
subset(!!sym(grp_column) == input_grp)
clusts <- res@meta.data %>%
pull(clust_column)
if (n_distinct(clusts) < 2) {
return(NULL)
}
res <- res %>%
find_markers(group_column = clust_column) %>%
mutate(cell_type = input_grp)
res
}
# Create reference UMAP for comparisons
create_ref_umap <- function(input_sobj, pt_mtplyr = 1, color_guide, ...) {
res <- input_sobj %>%
plot_features(
pt_size = 0.1 * pt_mtplyr,
pt_outline = 0.4,
...
) +
guides(color = color_guide) +
blank_theme +
theme(
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size = 10)
)
res
}
# Create UMAPs showing marker gene signal
create_marker_umaps <- function(input_sobj, input_markers, umap_col, add_outline = NULL, pt_mtplyr = 1) {
pt_size <- 0.25 * pt_mtplyr
res <- input_markers %>%
map(~ {
input_sobj %>%
plot_features(
feature = .x,
plot_cols = c("#fafafa", umap_col),
pt_outline = add_outline,
pt_size = pt_size
) +
ggtitle(.x) +
blank_theme +
theme(
plot.title = element_text(size = 13),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 8),
legend.key.height = unit(0.1, "cm"),
legend.key.width = unit(0.3, "cm"),
axis.title.y = element_text(size = 13, color = "white"),
axis.text.y = element_text(size = 8, color = "white")
)
})
res
}
# Create boxplots showing marker gene signal
create_marker_boxes <- function(input_sobj, input_markers, clust_column, box_cols,
group = NULL, include_legend = F, all_boxes = F,
all_violins = F, order_boxes = T, n_boxes = 10,
n_rows = 2, pt_mtplyr = 1, exclude_clust = NULL, ...) {
# Retrieve and format data for boxplots
input_markers <- input_markers %>%
head(n_boxes)
box_data <- input_sobj %>%
FetchData(c(clust_column, input_markers)) %>%
as_tibble(rownames = "cell_id") %>%
filter(!(!!sym(clust_column) %in% exclude_clust)) %>%
mutate(grp = str_remove(!!sym(clust_column), "^[a-zA-Z0-9_]+-")) # This is specific for Shannon's subtype clusters
input_markers <- input_markers %>%
str_trunc(9)
# Filter based on input group
if (!is.null(group)) {
box_data <- box_data %>%
filter(grp == group)
}
# Format data for plots
box_data <- box_data %>%
pivot_longer(cols = c(-cell_id, -grp, -!!sym(clust_column)), names_to = "key", values_to = "Counts") %>%
mutate(
!!sym(clust_column) := fct_relevel(!!sym(clust_column), names(box_cols)),
key = str_trunc(key, width = 9, side = "right"),
key = fct_relevel(key, input_markers)
)
# Order boxes by mean signal
if (order_boxes) {
box_data <- box_data %>%
mutate(!!sym(clust_column) := fct_reorder(!!sym(clust_column), Counts, mean, .desc = T))
}
n_clust <- box_data %>%
pull(clust_column) %>%
n_distinct()
# Create plots
n_cols <- ceiling(n_boxes / n_rows)
res <- box_data %>%
ggplot(aes(!!sym(clust_column), Counts, color = !!sym(clust_column))) +
facet_wrap(~ key, ncol = n_cols) +
scale_color_manual(values = box_cols) +
theme_info +
theme(
panel.spacing.x = unit(0.7, "cm"),
strip.background = element_blank(),
strip.text = element_text(size = 13),
legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_text(size = 13),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.line.x = element_blank()
)
# Adjust output plot type
if (n_clust > 6 || all_boxes) {
res <- res +
geom_boxplot(aes(fill = !!sym(clust_column), color = !!sym(clust_column)), size = 0, outlier.color = "#f0f0f0", outlier.size = 0.25) +
stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
scale_fill_manual(values = box_cols) +
theme(...)
} else if (all_violins) {
res <- res +
geom_violin(aes(fill = !!sym(clust_column)), size = 0.2) +
stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
scale_fill_manual(values = box_cols) +
scale_color_manual(values = box_cols) +
theme(...)
} else {
pt_size <- 0.3 * pt_mtplyr
res <- res +
geom_quasirandom(size = pt_size) +
theme(...)
}
# Add legend
if (include_legend) {
res <- res +
guides(color = col_guide) +
theme(legend.position = "top")
}
# Add blank space for missing facets
n_keys <- n_distinct(box_data$key)
if (n_keys <= n_cols && n_rows > 1) {
n_keys <- if_else(n_keys == 1, 2, as.double(n_keys))
n_cols <- floor(n_cols / n_keys)
res <- res %>%
plot_grid(
ncol = n_cols,
nrow = 2
)
}
res
}
# Create figure summarizing marker genes
create_marker_fig <- function(input_sobj, input_markers, input_GO, clust_column,
input_umap, umap_color, fig_heights = c(0.46, 0.3, 0.3),
GO_genome = params$genome, box_colors, n_boxes = 10,
umap_outline = NULL, umap_mtplyr = 1, xlsx_name = NULL,
sheet_name = NULL, ...) {
blank_umap <- ggplot() +
geom_blank() +
theme_void()
marks_umap <- blank_umap
marks_boxes <- blank_umap
GO_bubbles <- blank_umap
# Create UMAPs showing marker gene signal
if (nrow(input_markers) > 0) {
top_marks <- input_markers$feature %>%
head(n_boxes)
clust_legend <- get_legend(input_umap)
input_umap <- input_umap +
theme(legend.position = "none")
marks_umap <- input_sobj %>%
create_marker_umaps(
input_markers = head(top_marks, 7),
umap_col = umap_color,
add_outline = umap_outline,
pt_mtplyr = umap_mtplyr
) %>%
append(list(input_umap), .)
marks_umap <- plot_grid(
plotlist = marks_umap,
ncol = 4,
nrow = 2,
align = "vh",
axis = "trbl"
)
marks_umap <- plot_grid(
clust_legend, marks_umap,
rel_heights = c(0.2, 0.9),
nrow = 2
)
# Create boxplots showing marker gene signal
marks_boxes <- input_sobj %>%
create_marker_boxes(
input_markers = top_marks,
clust_column = clust_column,
box_cols = box_colors,
n_boxes = n_boxes,
plot.margin = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
...
)
# Create GO term plots
if (nrow(input_GO) > 0) {
GO_bubbles <- input_GO %>%
create_bubbles(plot_colors = umap_color) +
theme(
plot.margin = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
strip.background = element_blank(),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 13),
axis.text.y = element_text(size = 8),
axis.line.x = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 8)
)
# Write GO terms to excel file
if (!is.null(xlsx_name)) {
input_GO %>%
dplyr::select(
term_name, term_id,
source, effective_domain_size,
query_size, intersection_size,
p_value, significant
) %>%
arrange(source, p_value) %>%
write.xlsx(
file = str_c(xlsx_name, "_GO.xlsx"),
sheetName = sheet_name,
append = T
)
}
}
# Write markers to excel file
if (!is.null(xlsx_name)) {
input_markers %>%
write.xlsx(
file = str_c(xlsx_name, "_markers.xlsx"),
sheetName = sheet_name,
append = T
)
}
}
# Create final figure
res <- plot_grid(
marks_umap, marks_boxes, GO_bubbles,
rel_heights = fig_heights,
ncol = 1,
align = "v",
axis = "rl"
)
if (nrow(input_markers) < n_boxes) {
res <- plot_grid(
marks_umap, marks_boxes, GO_bubbles,
rel_heights = fig_heights,
ncol = 1
)
}
res
}
# Filter clusters and set cluster order
set_cluster_order <- function(input_cols, input_marks, n_cutoff = 5) {
input_marks <- input_marks %>%
group_by(group) %>%
filter(n() >= n_cutoff) %>%
ungroup()
marks <- unique(input_marks$group)
res <- names(input_cols)
res <- res[res %in% marks]
res
}
# Create v1 panel for marker genes
create_marker_panel_v1 <- function(input_sobj, input_cols, input_umap = NULL, clust_column, order_boxes = T,
color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
uniq = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, exclude_clust = NULL,
...) {
# Set point size
umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
ref_mtplyr <- if_else(umap_mtplyr == 1, umap_mtplyr, umap_mtplyr * 2.5)
# Find marker genes
markers <- input_sobj %>%
find_markers(group_column = clust_column)
# Find GO terms
GO_df <- markers
if (nrow(markers) > 0) {
GO_df <- markers %>%
group_by(group) %>%
do({
arrange(., desc(logFC)) %>%
pull(feature) %>%
run_gprofiler(genome = params$genome)
}) %>%
ungroup()
}
if (uniq && nrow(GO_df) > 0) {
GO_df <- GO_df %>%
group_by(term_id) %>%
filter(n() == 1) %>%
ungroup()
}
# Set cluster order based on order of input_cols
fig_clusters <- input_cols %>%
set_cluster_order(markers)
fig_clusters <- fig_clusters[!fig_clusters %in% exclude_clust]
# Create figures
for (i in seq_along(fig_clusters)) {
cat("\n#### ", fig_clusters[i], "\n", sep = "")
# Filter markers and GO terms
clust <- fig_clusters[i]
fig_marks <- markers %>%
filter(group == clust)
fig_GO <- GO_df %>%
filter(group == clust)
# Create reference umap
ref_umap <- input_umap
umap_col <- input_cols[clust]
if (is.null(input_umap)) {
umap_levels <- input_cols[names(input_cols) != clust]
umap_levels <- names(c(umap_levels, umap_col))
ref_umap <- input_sobj %>%
create_ref_umap(
feature = clust_column,
plot_cols = input_cols,
feat_levels = umap_levels,
pt_mtplyr = ref_mtplyr,
color_guide = color_guide
)
}
# Create panel
marker_fig <- input_sobj %>%
create_marker_fig(
input_markers = fig_marks,
input_GO = fig_GO,
clust_column = clust_column,
input_umap = ref_umap,
umap_color = umap_col,
box_colors = input_cols,
order_boxes = order_boxes,
umap_mtplyr = umap_mtplyr,
xlsx_name = xlsx_name,
sheet_name = clust,
exclude_clust = exclude_clust,
...
)
cat(nrow(fig_marks), "marker genes were identified,", nrow(fig_GO), "GO terms were identified.")
print(marker_fig)
cat("\n\n---\n\n<br>\n\n<br>\n\n")
}
}
# Create v2 panel that splits plots into groups
create_marker_panel_v2 <- function(input_sobj, input_markers, input_cols, grp_column, clust_column,
color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
uniq_GO = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, clust_regex = "^[a-zA-Z0-9_]+-",
...) {
# Set point size
umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
ref_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr * 2.5, 1)
# Figure colors and order
fig_clusters <- input_cols %>%
set_cluster_order(input_markers)
# Find GO terms
GO_df <- input_markers %>%
group_by(group) %>%
do({
arrange(., desc(logFC)) %>%
pull(feature) %>%
run_gprofiler(genome = params$genome)
}) %>%
ungroup()
if (uniq_GO && nrow(GO_df) > 0) {
GO_df <- GO_df %>%
group_by(term_id) %>%
filter(n() == 1) %>%
ungroup()
}
# Create figures
for (i in seq_along(fig_clusters)) {
cat("\n#### ", fig_clusters[i], "\n", sep = "")
# Filter markers and GO terms
clust <- fig_clusters[i]
fig_marks <- input_markers %>%
filter(group == clust)
fig_GO <- GO_df %>%
filter(group == clust)
# Set colors
umap_col <- input_cols[clust]
group <- clust %>%
str_remove(clust_regex)
grp_regex <- str_c("-", group, "$") %>%
str_replace("\\+", "\\\\+") # include this to escape "+" in names
fig_cols <- input_cols[grepl(grp_regex, names(input_cols))]
fig_cols <- c( "Other" = "#fafafa", fig_cols)
ref_cols <- fig_cols[names(fig_cols) != clust]
ref_cols <- c(ref_cols, umap_col)
# Create reference UMAP
ref_umap <- input_sobj %>%
FetchData(c("UMAP_1", "UMAP_2", grp_column, clust_column)) %>%
as_tibble(rownames = "cell_id") %>%
mutate(!!sym(clust_column) := if_else(
!!sym(grp_column) != group,
"Other",
!!sym(clust_column)
)) %>%
create_ref_umap(
feature = clust_column,
plot_cols = ref_cols,
feat_levels = names(ref_cols),
pt_mtplyr = ref_mtplyr,
color_guide = color_guide
)
# Create panel
marker_fig <- input_sobj %>%
create_marker_fig(
input_markers = fig_marks,
input_GO = fig_GO,
clust_column = clust_column,
input_umap = ref_umap,
umap_color = umap_col,
box_colors = fig_cols,
group = group,
umap_mtplyr = umap_mtplyr,
xlsx_name = xlsx_name,
sheet_name = clust,
...
)
cat(nrow(fig_marks), "marker genes were identified.", nrow(fig_GO), "GO terms were identified.")
print(marker_fig)
cat("\n\n---\n\n<br>\n\n<br>\n\n")
}
}
# Default chunk options
knitr::opts_chunk$set(message = F, warning = F)
# Load packages
R_packages <- c(
"tidyverse", "Seurat",
"gprofiler2", "knitr",
"cowplot", "ggbeeswarm",
"ggrepel", "RColorBrewer",
"xlsx", "colorblindr",
"ggforce", "broom",
"mixtools", "clustifyr",
"boot", "scales",
"patchwork", "ComplexHeatmap",
"devtools", "broom",
"presto"
)
for (package in R_packages) {
library(package, character.only = T)
}
# ggplot2 themes
theme_info <- theme_cowplot() +
theme(
plot.title = element_text(face = "plain", size = 20),
strip.background = element_blank(),
strip.text = element_text(face = "plain")
)
umap_theme <- theme_info +
theme(
axis.text = element_blank(),
axis.ticks = element_blank()
)
blank_theme <- umap_theme +
theme(
axis.line = element_blank(),
axis.title = element_blank()
)
# Legend guides
col_guide <- guide_legend(override.aes = list(size = 3.5, shape = 16))
outline_guide <- guide_legend(override.aes = list(
size = 3.5,
shape = 21,
color = "black",
stroke = 0.25
))
# Base color palettes
base_cols <- c(
"#225ea8", # blue
"#e31a1c", # red
"#238443", # green
"#ec7014", # orange
"#6a51a3", # purple
"#c51b7d", # pink
"#8c510a", # brown
"#217D87", # teal, darken("#41b6c4", 0.3)
"#F0E442", # yellow, palette_OkabeIto[4]
"#000000" # black
)
base_cols_paired <- base_cols %>%
map(~ {
.x %>%
lighten(0.25) %>%
desaturate(0.2) %>%
c(.x)
})
names(base_cols_paired) <- base_cols
base_cols <- base_cols %>%
lighten(0.25) %>%
desaturate(0.2) %>%
c(base_cols, .)
# Okabe Ito color palettes
ito_cols <- c(
palette_OkabeIto[1:4], "#d7301f",
palette_OkabeIto[5:6], "#6a51a3",
palette_OkabeIto[7:8]
)
ito_cols <- ito_cols %>%
darken(0.4) %>%
c(ito_cols, ., "#000000")
# Set color palette
theme_cols <- ito_cols# Add one to variable
plus_one <- function(x, n = 1) {
cmd <- str_c(x, " <<- ", x, " + ", n)
eval(parse(text = cmd))
eval(parse(text = x))
}
# Add arrows to axis
add_arrow_axis <- function(gg_in, fract = 0.1, ...) {
get_line_coords <- function(range_in, fract) {
mn <- range_in[1]
mx <- range_in[2]
dif <- mx - mn
res <- c(mn - (dif * 0.05), mn + (dif * fract))
res
}
x_coords <- ggplot_build(gg_in)$layout$panel_scales_x[[1]]$range$range %>%
get_line_coords(fract = fract)
y_coords <- ggplot_build(gg_in)$layout$panel_scales_y[[1]]$range$range %>%
get_line_coords(fract = fract)
res <- gg_in +
geom_segment(
x = x_coords[1],
xend = x_coords[2],
y = y_coords[1],
yend = y_coords[1],
size = 0.25,
color = "black",
linejoin = "bevel",
arrow = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
...
) +
geom_segment(
y = y_coords[1],
yend = y_coords[2],
x = x_coords[1],
xend = x_coords[1],
size = 0.25,
color = "black",
linejoin = "bevel",
arrow = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
...
) +
theme(axis.title = element_text(hjust = 0, size = 10))
res
}
# Set equal x-axis scales
equalize_x <- function(gg_list_in, log_tran = T, ...) {
set_lims <- function(gg_in, min_x, max_x, log_tran, ...) {
res <- gg_in +
coord_cartesian(xlim = c(min_x, max_x))
if (log_tran) {
res <- res +
scale_x_log10(labels = trans_format("log10", math_format(10^.x)), ...)
}
res
}
gg_ranges <- gg_list_in %>%
map(~ ggplot_build(.x)$layout$panel_scales_x[[1]]$range$range)
min_val <- gg_ranges %>%
map_dbl(~ .x[1]) %>%
min()
max_val <- gg_ranges %>%
map_dbl(~ .x[2]) %>%
max()
res <- gg_list_in %>%
map(
set_lims,
min_x = min_val,
max_x = max_val,
log_tran = log_tran,
...
)
res
}
# Function to create figure panel
create_fig3 <- function(sobj_in, cols_in, subtype_column = "subtype", data_slot = "counts",
box_counts = c("Relative ova signal" = "ova_fc"), umap_counts = c("ova counts" = "adt_ovalbumin"),
plot_title = NULL, arrow_axis = F, pt_size = 0.1, pt_outline = 0.4, pt_size_2 = 0.3, pt_outline_2 = 0.5,
umap_cell_count = F, box_cell_count = T, control_types = c("B Cell", "T Cell"), ...) {
box_column <- umap_column <- "cell_type"
box_cols <- umap_cols <- cols_in
# Fetch plotting data
data_df <- sobj_in %>%
FetchData(c(subtype_column, box_counts, umap_counts, "UMAP_1", "UMAP_2"), slot = data_slot) %>%
as_tibble(rownames = "cell_id")
# Add pseudo count
if (0 %in% pull(data_df, box_counts)) {
data_df <- data_df %>%
mutate(
pseudo = ifelse(!!sym(box_counts) > 0, !!sym(box_counts), NA),
pseudo = min(pseudo, na.rm = T) * 0.5,
!!sym(box_counts) := !!sym(box_counts) + pseudo
)
}
if (!is.null(names(box_counts))) {
data_df <- data_df %>%
rename(!!box_counts)
box_counts <- names(box_counts)
}
if (!is.null(names(umap_counts))) {
data_df <- data_df %>%
rename(!!umap_counts)
umap_counts <- names(umap_counts)
}
# Set subtype order
# Move select cell types to front of order
data_df <- data_df %>%
mutate(
cell_type = !!sym(subtype_column),
cell_type = fct_reorder(cell_type, !!sym(box_counts), median)
)
type_order <- levels(data_df$cell_type)
if (!is.null(control_types)) {
control_types <- control_types[control_types %in% type_order]
type_order <- type_order[!type_order %in% control_types]
type_order <- c(control_types, type_order)
}
# Count cells for each subtype
data_df <- data_df %>%
group_by(cell_type) %>%
mutate(cell_count = n_distinct(cell_id)) %>%
ungroup() %>%
mutate(cell_type = fct_relevel(cell_type, type_order)) %>%
arrange(cell_type) %>%
mutate(
cell_count = str_c(cell_type, "\n(n = ", cell_count, ")"),
cell_count = fct_inorder(cell_count)
)
# Set cell type colors
names(type_order) <- levels(data_df$cell_count)
cols_df <- tibble(
cell_type = type_order,
cell_count = names(type_order)
)
cols_df <- cols_df %>%
mutate(color = cols_in[cell_type])
# Subtype UMAP
if (umap_cell_count) {
umap_column <- "cell_count"
umap_cols <- setNames(cols_df$color, cols_df$cell_count)
}
umap <- data_df %>%
plot_features(
feature = umap_column,
pt_size = pt_size,
pt_outline = pt_outline,
plot_cols = umap_cols,
feat_levels = rev(names(umap_cols))
) +
guides(color = guide_legend(override.aes = list(size = 3.5))) +
ggtitle(plot_title) +
blank_theme +
theme(
plot.title = element_text(size = 12),
legend.position = "none"
) +
theme(...)
if (arrow_axis) {
umap <- add_arrow_axis(umap)
}
# OVA UMAP
if (!is.null(umap_counts)) {
ova_umap <- data_df %>%
plot_features(
feature = umap_counts,
plot_cols = c("#fafafa", "#d7301f"),
pt_size = pt_size_2,
pt_outline = pt_outline_2,
min_pct = 0.01,
max_pct = 0.99
) +
blank_theme +
theme(
plot.title = element_text(size = 10, hjust = 0.5),
legend.position = "right",
legend.key.width = unit(0.15, "cm"),
legend.key.height = unit(0.30, "cm"),
legend.title = element_text(size = 10),
legend.text = element_text(size = 10)
)
if (arrow_axis) {
ova_umap <- add_arrow_axis(ova_umap)
}
}
# OVA boxes
if (box_cell_count) {
box_column <- "cell_count"
box_cols <- setNames(cols_df$color, cols_df$cell_count)
}
boxes <- data_df %>%
ggplot(aes(!!sym(box_counts), !!sym(box_column), fill = !!sym(box_column))) +
geom_violin(size = 0.3, draw_quantiles = c(0.25, 0.75), alpha = 0.75) +
stat_summary(geom = "point", color = "black", fun = median) +
scale_color_manual(values = box_cols) +
scale_fill_manual(values = box_cols) +
theme_minimal_vgrid() +
theme(
legend.position = "none",
axis.title.y = element_blank(),
axis.title = element_text(size = 10),
axis.text = element_text(size = 10),
axis.ticks.x = element_line(size = 0.1),
panel.grid.major.x = element_line(size = 0.1)
)
res <- list(umap, boxes)
if (!is.null(umap_counts)) {
res <- append(res, list(ova_umap))
}
names(res) <- rep(plot_title, length(res))
res
}
# Plot correlation
plot_corr <- function(sobj_in, x, y, feat, data_slot, cols_in, plot_title, ...) {
res <- sobj_in %>%
FetchData(c(x, y, feat), slot = data_slot) %>%
mutate(
!!sym(x) := log10(!!sym(x)),
!!sym(y) := log10(!!sym(y))
) %>%
filter(
!!sym(x) != -Inf,
!!sym(y) != -Inf
) %>%
plot_features(
x = x,
y = y,
feature = feat,
data_slot = "counts",
plot_cols = cols_in,
lab_pos = c(0.9, 1),
lab_size = 5,
calc_cor = T,
lm_line = T,
...
) +
ggtitle(plot_title) +
guides(color = guide_legend(override.aes = list(size = 3.5))) +
theme_info +
theme(legend.title = element_blank())
res
}
# Calculate pairwise p-values for gg objects
calc_p_vals <- function(gg_in, sample_name, data_column, type_column, log_tran = T) {
# Pull data from gg object
gg_data <- gg_in$data
# Log transform
if (log_tran) {
gg_data <- gg_data %>%
mutate(!!sym(data_column) := log10(!!sym(data_column)))
}
# Calculate median
gg_stats <- gg_data %>%
group_by(!!sym(type_column)) %>%
summarize(med = median(!!sym(data_column)))
# Run wilcox test
gg_counts <- gg_data %>%
pull(data_column)
gg_groups <- gg_data %>%
pull(type_column)
res <- gg_counts %>%
pairwise.wilcox.test(
g = gg_groups,
p.adj = "bonf"
) %>%
tidy()
# Add medians to data.frame
res <- gg_stats %>%
rename(med_1 = med) %>%
right_join(res, by = c("cell_type" = "group1"))
res <- gg_stats %>%
rename(med_2 = med) %>%
right_join(res, by = c("cell_type" = "group2"))
# Format final table
res <- res %>%
mutate(Sample = sample_name) %>%
select(
Sample,
`Cell type 1` = str_c(type_column, ".y"),
`Median OVA FC 1 (log10)` = med_1,
`Cell type 2` = type_column,
`Median OVA FC 2 (log10)` = med_2,
p.value
)
res
}
# Set paths/names
so_paths <- params$sobjs %>%
pull_nest_vec("sobj_in") %>%
map_chr(~ file.path(params$rds_dir, .x))
so_out <- params$sobjs %>%
pull_nest_vec("sobj_out") %>%
map_chr(~ file.path(params$rds_dir, .x))
so_types <- params$sobjs %>%
pull_nest_vec("cell_type")
so_titles <- params$sobjs %>%
pull_nest_vec("title")
# Load and subset objects
# Avoid loading same objects twice
if (!all(file.exists(so_out))) {
sobjs <- unique(so_paths) %>% # Load objects
setNames(., .) %>%
map(read_rds)
sobjs <- sobjs[match(so_paths, names(sobjs))]
names(sobjs) <- names(so_paths)
inc_types <- so_types %>%
map(~ if_else(
.x == "LEC",
list(c("B cell", "T cell", "epithelial")),
list(c("B cell", "T cell"))
)) %>%
flatten()
sobjs <- imap(sobjs, subset_sobj, so_types, inc_types) # Subset objects
sobjs <- imap(sobjs, run_clustifyr, so_types) # Run clustifyr
# Write new objects
sobjs %>%
iwalk(~ write_rds(.x, path = so_out[.y]))
} else {
sobjs <- so_out %>%
map(read_rds)
}
# Color palettes
common_cols <- c(
"Epithelial" = "#6a51a3",
"B Cell" = "#E69F00",
"T Cell" = "#009E73",
"Unassigned" = "#ffffff"
)
so_cols <- so_types %>%
map(
set_type_cols,
sobjs_in = sobjs,
type_key = so_types,
cols_in = ito_cols,
other_cols = common_cols
)
# Parameter lists
fig3_names <- c("d2_DC", "d14_DC", "d14_LEC", "d14_FRC")
fig3_sobjs <- sobjs[fig3_names]
fig3_params <- list(
sobj_in = fig3_sobjs,
plot_title = so_titles[names(fig3_sobjs)],
cols_in = so_cols[names(fig3_sobjs)]
)
corr_params <- list(
sobj_in = sobjs,
plot_title = so_titles[names(sobjs)],
cols_in = so_cols[names(sobjs)]
)
n_plot <- 0# Create panel plots
gg_list <- fig3_params %>%
pmap(
create_fig3,
box_counts = c("Relative ova signal" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create table of p-values
if (!is.null(params$p_val_xlsx)) {
p_vals <- gg_list[violin_idx] %>%
imap(
calc_p_vals,
data_column = "Relative ova signal",
type_column = "cell_type",
log_tran = T
) %>%
bind_rows()
p_vals %>%
write.xlsx(params$p_val_xlsx)
}
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)Figures for all FRC timepoints
# Figure parameters
FRC_names <- c("d2_FRC", "d14_FRC")
FRC_sobjs <- sobjs[FRC_names]
FRC_params <- list(
sobj_in = FRC_sobjs,
plot_title = so_titles[names(FRC_sobjs)],
cols_in = so_cols[names(FRC_sobjs)],
pt_size = c(1.5, 0.1),
pt_outline = c(2, 0.4),
pt_size_2 = c(1.5, 0.3),
pt_outline_2 = c(2, 0.5)
)
# Create panel plots
gg_list <- FRC_params %>%
pmap(
create_fig3,
box_counts = c("Relative ova signal" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)Clustering for LEC subtype assignment using Xiang et al. reference
# Load reference
load(params$sobj_refs$LEC)
so_in <- sobjs[["d14_LEC"]] %>%
FindNeighbors(
reduction = "pca",
dims = 1:30
)
# Get subtypes for different resolutions/thresholds
resln <- c(0.6, 1, 1.4, 1.8, 3)
r <- seq(0.5, 0.8, 0.1)
res_df <- expand.grid(resln, r) %>%
rename(resln = Var1, r_thresh = Var2) %>%
as_tibble()
plot_df <- map2(res_df$resln, res_df$r_thresh, ~ {
res <- so_in %>%
FindClusters(
resolution = .x,
verbose = F
) %>%
clustify(
ref_mat = ref_LEC_xiang,
cluster_col = "seurat_clusters",
threshold = .y
)
res <- res@meta.data %>%
as_tibble(rownames = "cell_id") %>%
mutate(
subtype = if_else(cell_type == "LEC", type.clustify, subtype),
subtype = str_to_title_v2(subtype),
UMAP_1 = UMAP_1.x,
UMAP_2 = UMAP_2.x,
n_clust = n_distinct(seurat_clusters),
r = r.clustify,
r_thresh = .y
) %>%
select(
cell_id, orig.ident, seurat_clusters,
cell_type, subtype, n_clust,
r, r_thresh, UMAP_1, UMAP_2
)
res
}) %>%
bind_rows()
# Subtype UMAPs
subtype_gg <- plot_df %>%
plot_features(
feature = "subtype",
pt_outline = 0.5,
pt_size = 0.05,
plot_cols = so_cols[["d14_LEC"]],
split_id = c("n_clust", "r_thresh"),
switch = "y"
) +
blank_theme +
labs(subtitle = "Clustifyr Threshold", y = "Number of Clusters") +
guides(color = outline_guide) +
blank_theme +
theme(
plot.subtitle = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA, color = "black", size = 0.2),
legend.title = element_blank(),
axis.title.y = element_text()
)
# Reference UMAPs
clust_guide <- outline_guide
clust_guide$ncol <- 1
ref_gg <- plot_df %>%
mutate(r_thresh = "Clusters") %>%
unique() %>%
plot_features(
feature = "seurat_clusters",
pt_outline = 0.5,
pt_size = 0.05,
split_id = c("n_clust", "r_thresh"),
switch = "y"
) +
blank_theme +
labs(y = "Number of Clusters") +
guides(fill = clust_guide) +
blank_theme +
theme(
plot.subtitle = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA, color = "black", size = 0.2),
legend.title = element_blank(),
axis.title.y = element_text()
)
# Correlation UMAPs
cor_df <- plot_df %>%
mutate(r_thresh = "Correlation") %>%
unique()
cor_gg <- cor_df %>%
ggplot(aes(UMAP_1, UMAP_2, color = r))
walk(unique(cor_df$seurat_clusters), ~ {
pt_1 <- cor_df %>%
filter(
seurat_clusters == .x,
r <= 0.6
)
pt_2 <- cor_df %>%
filter(
seurat_clusters == .x,
r > 0.6
)
cor_gg <<- cor_gg +
geom_point(data = pt_1, color = "black", size = 0.5) +
geom_point(data = pt_1, color = "white", size = 0.05) +
geom_point(data = pt_2, color = "black", size = 0.5) +
geom_point(data = pt_2, size = 0.05)
})
cor_gg <- cor_gg +
facet_grid(n_clust ~ r_thresh, switch = "y") +
labs(y = "Number of Clusters") +
blank_theme +
theme(
panel.border = element_rect(fill = NA, color = "black", size = 0.2),
legend.title = element_blank(),
axis.title.y = element_text()
)
# Create final figure
plot_grid(
ref_gg, cor_gg, subtype_gg,
rel_widths = c(0.4, 0.4, 1),
nrow = 1,
align = "vh",
axis = "tbrl"
)Figures for all LEC timepoints
# Figure parameters
LEC_names <- c("d2_LEC", "d14_LEC")
LEC_sobjs <- sobjs[LEC_names]
LEC_params <- list(
sobj_in = LEC_sobjs,
plot_title = so_titles[names(LEC_sobjs)],
cols_in = so_cols[names(LEC_sobjs)],
pt_size = c(0.5, 0.1),
pt_outline = c(0.8, 0.4),
pt_size_2 = c(0.5, 0.3),
pt_outline_2 = c(0.8, 0.5)
)
# Create panel plots
gg_list <- LEC_params %>%
pmap(
create_fig3,
box_counts = c("Relative ova signal" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)Correlation between RNA counts and ova counts grouped by cell type
corr_params %>%
pmap(
plot_corr,
x = c("RNA counts (log10)" = "nCount_RNA"),
y = c("ova counts (log10)" = "adt_ovalbumin"),
feat = "subtype",
data_slot = "counts",
pt_size = 0.5
) %>%
plot_grid(
plotlist = .,
ncol = 2,
align = "vh",
axis = "trbl"
)Correlation between RNA counts and ova counts grouped by cell type and subtype
pad <- c(2, 2, 10.5, 2, 10.5, 2)
corr_params %>%
pmap(
plot_corr,
x = c("RNA counts (log10)" = "nCount_RNA"),
y = c("ova counts (log10)" = "adt_ovalbumin"),
feat = "subtype",
split_id = "subtype",
data_slot = "counts",
pt_size = 1,
scales = "free",
ncol = 3
) %>%
map2(pad, ~ {
.x + theme(
legend.position = "none",
strip.text = element_text(size = 14),
plot.margin = unit(c(0.2, 0.2, .y, 0.2), "cm")
)
}) %>%
plot_grid(
plotlist = .,
rel_heights = c(1, 1, 1),
align = "v",
ncol = 2
)